if(!file.exists('./../Data/Chow_preprocessed.RData')){
# Download and load Microarray_ASD_Chow_normalized.RData
datMeta$Disease.status = datMeta$DX
# Perform Differential Expression Analysis
SAM_fit = SAM(as.matrix(datExpr), as.factor(datMeta$Disease.status), resp.type = 'Two class unpaired',
geneid = rownames(datExpr), random.seed = 1234, logged2 = TRUE, fdr.output = 1)
genes_up = SAM_fit$siggenes.table$genes.up %>% data.frame
genes_down = SAM_fit$siggenes.table$genes.lo %>% data.frame
DE_info = genes_up %>% bind_rows(genes_down)
# Load SFARI information
SFARI_genes = read_csv('./../../Gandal/RNAseq/working_data/SFARI_genes_01-15-2019.csv')
ilmn_gene_map = mapIds(illuminaHumanv4.db, key=rownames(datExpr),
column=c('SYMBOL'), keytype='PROBEID', multiVals='filter')
SFARI_genes = data.frame('ID'=names(ilmn_gene_map), 'gene-symbol'=unname(ilmn_gene_map)) %>%
right_join(SFARI_genes, by=c('gene.symbol'='gene-symbol'))
# Add neuronal annotations
GO_annotations = read.csv('./../../Gandal/RNAseq/working_data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>%
distinct(ensembl_gene_id) %>%
mutate('Neuronal' = 1)
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
host='feb2014.archive.ensembl.org')
ensembl_gene_map = getBM(attributes=c('ensembl_gene_id', 'hgnc_symbol'), filters=c('ensembl_gene_id'),
values=GO_neuronal$ensembl_gene_id, mart=mart)
ensembl_ilmn_gene_map = data.frame('ID' = names(ilmn_gene_map), 'gene.symbol'=unname(ilmn_gene_map)) %>%
full_join(ensembl_gene_map, by=c('gene.symbol'='hgnc_symbol'))
GO_neuronal = GO_neuronal %>% left_join(ensembl_ilmn_gene_map, by='ensembl_gene_id') %>%
filter(!is.na(ID))
save(datExpr, datMeta, GO_neuronal, SFARI_genes, DE_info, file='./../Data/Chow_preprocessed.RData')
} else load('./../Data/Chow_preprocessed.RData')
datExpr_backup = datExpr
DE_info = DE_info %>% mutate('logFC'= Fold.Change, 'ID' = Gene.Name)
datMeta = datMeta %>% mutate('Diagnosis_' = ifelse(DX=='Autism','autism','control'))
rm(maxFC)
Number of genes and samples:
dim(datExpr)
## [1] 24356 33
Gene count by SFARI score of remaining genes:
table(SFARI_genes$`gene-score`)
##
## 1 2 3 4 5 6
## 41 96 276 660 279 35
Relation between SFARI scores and Neuronal functional annotation:
Neuronal_SFARI = data.frame('ID'=rownames(datExpr), 'Neuronal'=rownames(datExpr) %in% GO_neuronal$ID) %>%
left_join(SFARI_genes, by='ID')
tbl = table(Neuronal_SFARI$`gene-score`, Neuronal_SFARI$Neuronal, useNA='ifany') %>% t %>% as.data.frame.matrix
rownames(tbl) = c('Non-Neuronal','Neuronal')
tbl
## 1 2 3 4 5 6 NA
## Non-Neuronal 25 66 206 521 205 27 21531
## Neuronal 13 27 61 112 69 7 1487
tbl = round(sweep(tbl, 2, colSums(tbl), `/`)*100, 2)
tbl
## 1 2 3 4 5 6 NA
## Non-Neuronal 65.79 70.97 77.15 82.31 74.82 79.41 93.54
## Neuronal 34.21 29.03 22.85 17.69 25.18 20.59 6.46
rm(Neuronal_SFARI, tbl)
make_ASD_vs_CTL_df = function(datExpr){
datExpr_ASD = datExpr %>% data.frame %>% dplyr::select(which(datMeta$Diagnosis_=='autism'))
datExpr_CTL = datExpr %>% data.frame %>% dplyr::select(which(datMeta$Diagnosis_=='control'))
ASD_vs_CTL = data.frame('ID'=as.character(rownames(datExpr)),
'mean' = rowMeans(datExpr), 'sd' = apply(datExpr, 1, sd),
'mean_ASD'=rowMeans(datExpr_ASD), 'mean_CTL'=rowMeans(datExpr_CTL),
'sd_ASD'=apply(datExpr_ASD,1,sd), 'sd_CTL'=apply(datExpr_CTL,1,sd)) %>%
mutate('mean_diff'=mean_ASD-mean_CTL, 'sd_diff'=sd_ASD-sd_CTL) %>%
left_join(SFARI_genes, by='ID') %>%
dplyr::select(ID, mean, sd, mean_ASD, mean_CTL, mean_diff, sd_ASD, sd_CTL, sd_diff, `gene-score`) %>%
mutate('Neuronal'=ifelse(ID %in% GO_neuronal$ID, 'Neuronal', 'Non-Neuronal'))
ASD_vs_CTL_SFARI = ASD_vs_CTL %>% filter(!is.na(`gene-score`)) %>%
mutate(Group = as.character(`gene-score`)) %>% dplyr::select(-c(Neuronal,`gene-score`))
ASD_vs_CTL_Neuro = ASD_vs_CTL %>% mutate(Group = Neuronal) %>% dplyr::select(-c(Neuronal,`gene-score`))
ASD_vs_CTL_all = ASD_vs_CTL %>% mutate(Group = 'All') %>% dplyr::select(-c(Neuronal,`gene-score`))
ASD_vs_CTL_together = bind_rows(ASD_vs_CTL_SFARI, ASD_vs_CTL_Neuro, ASD_vs_CTL_all) %>%
mutate(Group = ordered(Group, levels=c('1','2','3','4','5','6',
'Neuronal','Non-Neuronal','All'))) %>%
left_join(DE_info, by='ID')
return(ASD_vs_CTL_together)
}
ASD_vs_CTL = make_ASD_vs_CTL_df(datExpr)
p1 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, mean, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Mean (left) and Standard Deviation (right) by score') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
p2 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, sd, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Mean (left) and Standard Deviation (right) by score') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
subplot(p1, p2, nrows=1)
rm(p1, p2)
The relation doesn’t seem to be completely linear, but close to.
ggplot(ASD_vs_CTL, aes(mean, sd)) + geom_point(alpha=0.1, aes(id=ID, fill=Group, color=Group)) +
scale_color_manual(values=gg_colour_hue(9)) + scale_fill_manual(values=gg_colour_hue(9)) +
scale_x_continuous(trans='log2') + scale_y_continuous(trans='log2') +
ggtitle(paste0('Corr=',round(cor(ASD_vs_CTL$mean, ASD_vs_CTL$sd), 2))) +
geom_smooth(method=lm, se=FALSE, color='#808080', size=0.5) + theme_minimal()
p1 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, abs(mean_diff), fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Difference in mean (left) and log Fold Change (right) by Diagnosis') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
p2 = ggplotly(ggplot(ASD_vs_CTL[complete.cases(ASD_vs_CTL),], aes(Group, abs(as.numeric(logFC)), fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Difference in mean (left) and log Fold Change (right) by Diagnosis') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
subplot(p1, p2, nrows=1)
rm(p1, p2)
if(!file.exists('./../Data/Chow_combat.RData')){
# Download and load Microarray_ASD_chow_normalized_CR_cleaned
datMeta$Disease.status = datMeta$DX
# Perform Differential Expression Analysis
SAM_fit = SAM(as.matrix(datExpr), as.factor(datMeta$Disease.status), resp.type = 'Two class unpaired',
geneid = rownames(datExpr), random.seed = 1234, logged2 = TRUE, fdr.output = 1)
genes_up = SAM_fit$siggenes.table$genes.up %>% data.frame
genes_down = SAM_fit$siggenes.table$genes.lo %>% data.frame
DE_info = genes_up %>% bind_rows(genes_down)
# Load SFARI information
SFARI_genes = read_csv('./../../Gandal/RNAseq/working_data/SFARI_genes_01-15-2019.csv')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL',
dataset='hsapiens_gene_ensembl',
host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=c('ensembl_gene_id', 'hgnc_symbol'), filters=c('hgnc_symbol'),
values=SFARI_genes$`gene-symbol`, mart=mart) %>%
mutate('gene-symbol'=hgnc_symbol, 'ID'=as.character(ensembl_gene_id)) %>%
dplyr::select('ID', 'gene-symbol')
SFARI_genes = left_join(SFARI_genes, gene_names, by='gene-symbol')
# Add neuronal annotations
GO_annotations = read.csv('./../../Gandal/RNAseq/working_data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>%
distinct(ensembl_gene_id) %>%
mutate('Neuronal' = 1, 'ID'=ensembl_gene_id)
save(datExpr, datMeta, GO_neuronal, SFARI_genes, DE_info, file='./../Data/Chow_combat.RData')
} else load('./../Data/Chow_combat.RData')
datExpr_backup = datExpr
DE_info = DE_info %>% mutate('logFC'= Fold.Change, 'ID' = Gene.Name)
datMeta = datMeta %>% mutate('Diagnosis_' = ifelse(DX=='Autism','autism','control'))
Number of genes and samples:
dim(datExpr)
## [1] 17643 30
Gene count by SFARI score of remaining genes:
table(SFARI_genes$`gene-score`)
##
## 1 2 3 4 5 6
## 29 82 209 538 191 25
Relation between SFARI scores and Neuronal functional annotation:
Neuronal_SFARI = data.frame('ID'=rownames(datExpr), 'Neuronal'=rownames(datExpr) %in% GO_neuronal$ID) %>%
left_join(SFARI_genes, by='ID')
tbl = table(Neuronal_SFARI$`gene-score`, Neuronal_SFARI$Neuronal, useNA='ifany') %>% t %>% as.data.frame.matrix
rownames(tbl) = c('Non-Neuronal','Neuronal')
tbl
## 1 2 3 4 5 6 NA
## Non-Neuronal 13 41 141 343 126 20 15831
## Neuronal 8 16 40 72 36 4 953
tbl = round(sweep(tbl, 2, colSums(tbl), `/`)*100, 2)
tbl
## 1 2 3 4 5 6 NA
## Non-Neuronal 61.9 71.93 77.9 82.65 77.78 83.33 94.32
## Neuronal 38.1 28.07 22.1 17.35 22.22 16.67 5.68
rm(Neuronal_SFARI, tbl)
make_ASD_vs_CTL_df = function(datExpr){
datExpr_ASD = datExpr %>% data.frame %>% dplyr::select(which(datMeta$Diagnosis_=='autism'))
datExpr_CTL = datExpr %>% data.frame %>% dplyr::select(which(datMeta$Diagnosis_=='control'))
ASD_vs_CTL = data.frame('ID'=as.character(rownames(datExpr)),
'mean' = rowMeans(datExpr), 'sd' = apply(datExpr, 1, sd),
'mean_ASD'=rowMeans(datExpr_ASD), 'mean_CTL'=rowMeans(datExpr_CTL),
'sd_ASD'=apply(datExpr_ASD,1,sd), 'sd_CTL'=apply(datExpr_CTL,1,sd)) %>%
mutate('mean_diff'=mean_ASD-mean_CTL, 'sd_diff'=sd_ASD-sd_CTL) %>%
left_join(SFARI_genes, by='ID') %>%
dplyr::select(ID, mean, sd, mean_ASD, mean_CTL, mean_diff, sd_ASD, sd_CTL, sd_diff, `gene-score`) %>%
mutate('Neuronal'=ifelse(ID %in% GO_neuronal$ID, 'Neuronal', 'Non-Neuronal'))
ASD_vs_CTL_SFARI = ASD_vs_CTL %>% filter(!is.na(`gene-score`)) %>%
mutate(Group = as.character(`gene-score`)) %>% dplyr::select(-c(Neuronal,`gene-score`))
ASD_vs_CTL_Neuro = ASD_vs_CTL %>% mutate(Group = Neuronal) %>% dplyr::select(-c(Neuronal,`gene-score`))
ASD_vs_CTL_all = ASD_vs_CTL %>% mutate(Group = 'All') %>% dplyr::select(-c(Neuronal,`gene-score`))
ASD_vs_CTL_together = bind_rows(ASD_vs_CTL_SFARI, ASD_vs_CTL_Neuro, ASD_vs_CTL_all) %>%
mutate(Group = ordered(Group, levels=c('1','2','3','4','5','6',
'Neuronal','Non-Neuronal','All'))) %>%
left_join(DE_info, by='ID')
return(ASD_vs_CTL_together)
}
ASD_vs_CTL = make_ASD_vs_CTL_df(datExpr)
p1 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, mean, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Mean (left) and Standard Deviation (right) by score') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
p2 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, sd, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Mean (left) and Standard Deviation (right) by score') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
subplot(p1, p2, nrows=1)
rm(p1, p2)
ggplot(ASD_vs_CTL, aes(mean, sd)) + geom_point(alpha=0.1, aes(id=ID, fill=Group, color=Group)) +
scale_color_manual(values=gg_colour_hue(9)) + scale_fill_manual(values=gg_colour_hue(9)) +
scale_x_continuous(trans='log2') + scale_y_continuous(trans='log2') +
ggtitle(paste0('Corr=',round(cor(ASD_vs_CTL$mean, ASD_vs_CTL$sd), 2))) +
geom_smooth(method=lm, se=FALSE, color='#808080', size=0.5) + theme_minimal()
p1 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, abs(mean_diff), fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Difference in mean (left) and log Fold Change (right) by Diagnosis') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
p2 = ggplotly(ggplot(ASD_vs_CTL[complete.cases(ASD_vs_CTL),], aes(Group, abs(as.numeric(logFC)), fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Difference in mean (left) and log Fold Change (right) by Diagnosis') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
subplot(p1, p2, nrows=1)
rm(p1, p2)